
cap program drop CleanEquifaxOutliers
program define CleanEquifaxOutliers
syntax varlist, ID(string) OUTLIER(string) START(real) END(real) BY(string) [GRAPHTYPE(string) GRAPHWEIGHT(string) DATE(string) COMPANY(string)]

	if "`date'" == "" {
		local date mdate
	}

		* Indicate if should be used in comparison value for following year
		xtset `id' `date'
		cap drop comparison
		gen comparison = F12.`outlier' == 0
		
		foreach var of varlist `varlist' {
			
			rename `var' `var'_unadj
			gen `var' = `var'_unadj
			
			replace `var' = . if `outlier' == 1 & year == `start'
			
			local start2 = `start' + 1
			
			foreach year of numlist `start2'(1)`end' {
				
				sum `outlier' if year == `year'
				if `r(mean)' > 0 {
					
					* Mean for last year
					local i = `year' - 1
					preserve
						keep if year == `i' & comparison == 1
						keep `by' `var'
						collapse (mean) `var', by(`by')
						rename `var' yr_comp
						tempfile comparison
						save `comparison', replace
					restore
					
					* Mean for this year
					preserve
						keep if year == `year' & `outlier' == 0
						keep `by' `var'
						collapse (mean) `var', by(`by')
						rename `var' yr
						tempfile current
						save `current', replace
					restore
					
					merge m:1 `by' using `comparison', nogen
					merge m:1 `by' using `current', nogen
					
					* Adjust
					xtset `id' `date'
					replace `var' = L12.`var'*(yr/yr_comp) if `outlier' == 1 & year == `year' & !missing(L12.`var') & !missing(yr) & !missing(yr_comp)
					replace `var' = yr if `outlier' == 1 & year == `year' & (missing(L12.`var') | missing(yr_comp))
					replace `var' = yr_comp if `outlier' == 1 & year == `year' & (missing(yr))
					
					drop yr yr_comp
					
				}
			}
			
			* Graph to check
			if "`graphtype'" == "by" {
				preserve
					drop if missing(`var')
					if regexm("`var'", "tot") | regexm("`var'", "freq") {
						collapse (sum) `var' `var'_unadj, by(`date' `outlier' `by')
					}
					else {
						collapse (mean) `var' `var'_unadj [pw = `graphweight'], by(`date' `outlier' `by')
					}
					levelsof `by', local(bylist)
					foreach type of local bylist {
						graph tw (line `var' `date') (line `var'_unadj `date') (scatter `var'_unadj `date' if `outlier' == 1)if `by' == `type', ///
						legend(order(1 "Smoothed variable" 2 "Original variable" 3 "Outliers")) ///
						ytitle(`var')
						graph export "$figures_tables/equifax/quality_checks/`var'_`company'_`by'_`type'_adj.png", replace
					}
				restore
			}
			else if "`graphtype'" == "national" {
				preserve
					drop if missing(`var')
					if regexm("`var'", "tot") {
						collapse (sum) `var' `var'_unadj, by(`date' `outlier')
					}
					else {
						collapse (mean) `var' `var'_unadj [pw = `graphweight'], by(`date' `outlier')
					}
					graph tw (line `var' `date') (line `var'_unadj `date') (scatter `var'_unadj `date' if `outlier' == 1), ///
					legend(order(1 "Smoothed variable" 2 "Original variable" 3 "Outliers")) ///
					ytitle(`var')
					graph export "$figures_tables/equifax/quality_checks/`var'_`company'_adj.png", replace
				restore
			}
			
		}

		drop comparison

end

